#!/usr/bin/env bash
#
# Copyright by The HDF Group.
# All rights reserved.
#
# This file is part of HDF5.  The full HDF5 copyright notice, including
# terms governing use, modification, and redistribution, is contained in
# the COPYING file, which can be found at the root of the source code
# distribution tree, or in https://www.hdfgroup.org/licenses.
# If you do not have access to either file, you may request a copy from
# help@hdfgroup.org.
#
BLD='\033[1m'
GRN='\033[0;32m'
RED='\033[0;31m'
PUR='\033[0;35m'
CYN='\033[0;36m'
NC='\033[0m' # No Color

############################################################
# Usage                                                    #
############################################################
function usage() {
   echo ""
   # Display usage
   echo "Purpose: Combine subfiles into a single HDF5 file. Requires the subfiling
         configuration file either as a command-line argument or the script will
         search for the *.config file in the current directory."
   echo ""
   echo "usage: h5fuse.sh [-f filename] [-h] [-p] [-q] [-r] [-v] "
   echo "-f filename  Subfile configuration file."
   echo "-h           Print this help."
   echo "-q           Quiet all output. [no]"
   echo "-p           h5fuse.sh is being run in parallel, with more than one rank. [no]"
   echo "-r           Remove subfiles after being processed. [no]"
   echo "-v           Verbose output. [no]"
   echo ""
}

function gen_mpi() {

# Program to determine MPI rank and size if being run in parallel (-p).

cat > "${c_src}" << EOL
#include <mpi.h>
#include <stdio.h>
int main() {
    MPI_Init(NULL, NULL);
    int world_size;
    MPI_Comm_size(MPI_COMM_WORLD, &world_size);
    int world_rank;
    MPI_Comm_rank(MPI_COMM_WORLD, &world_rank);
    printf("%d %d", world_rank, world_size);
    MPI_Barrier(MPI_COMM_WORLD);
    MPI_Finalize();
}
EOL

}

############################################################
############################################################
# Main program                                             #
############################################################
############################################################

############################################################
# Process the input options. Add options as needed.        #
############################################################
# Get the options
file_config=""
verbose="false"
quiet="false"
rm_subf="false"
parallel="false"
while getopts "hpqrvf:" option; do
   case $option in
      f) # subfiling configuration file
         file_config=$OPTARG;;
      h) # display Help
         usage
         exit;;
      p) # HDF5 fused file
         parallel="true";;
      q) # quiet all output
         quiet="true";;
      r) # remove completed subfiles
         rm_subf="true";;
      v) # verbose output
         verbose="true";;
     \?) # Invalid option
         echo -e "$RED ERROR: Invalid option ${BLD}-${OPTARG}${RED} $NC"
         usage
         exit 1;;
     * ) usage
         exit 1;;
   esac
done

FAILED=1
############################################################
# Configure file checks                                    #
############################################################
#
SUBF_CONFDIR="${H5FD_SUBFILING_CONFIG_FILE_PREFIX:-$PWD}"

# Try to find the config file
if [ -z "$file_config" ]; then
    nfiles=$(find "$SUBF_CONFDIR" -maxdepth 1 -type f -iname "*.config" -printf '.' | wc -m)
    if [[ "$nfiles" != "1" ]]; then
      if [[ "$nfiles" == "0" ]]; then
         echo -e "$RED Failed to find .config file in ${SUBF_CONFDIR} $NC"
         usage
         exit $FAILED
      else
         echo -e "$RED More than one .config file found in ${SUBF_CONFDIR} $NC"
         usage
         exit $FAILED
      fi
    fi
    file_config=$(find "${SUBF_CONFDIR}" -maxdepth 1 -type f -iname '*.config')
fi

if [ ! -f "$file_config" ]; then
    echo -e "${RED} configuration file ${BLD}$file_config${NC} ${RED}does not exist. $NC"
    exit $FAILED
fi

stripe_size=$(grep "stripe_size=" "$file_config"  | cut -d "=" -f2)
if test -z "$stripe_size"; then
    echo -e "$RED failed to find stripe_size in $file_config $NC"
    exit $FAILED
fi

hdf5_file="$(grep "hdf5_file=" "$file_config"  | cut -d "=" -f2)"
if test -z "$hdf5_file"; then
    echo -e "$RED failed to find hdf5 output file in $file_config $NC"
    exit $FAILED
fi

subfile_dir="$(grep "subfile_dir=" "$file_config"  | cut -d "=" -f2)"
if test -z "$subfile_dir"; then
    echo -e "$RED failed to find subfile directory in $file_config $NC"
    exit $FAILED
fi

# For bash 4.4+
subfs=$(sed -e '1,/subfile_dir=/d' "$file_config")
mapfile -t subfiles <<< "$subfs"
if [ ${#subfiles[@]} -eq 0 ]; then
    echo -e "$RED failed to find subfiles list in $file_config $NC"
    exit $FAILED
fi
nsubfiles=${#subfiles[@]}

# Get the number of local subfiles
subfiles_loc=()
subfiles_size=()
for i in "${subfiles[@]}"; do
    subfile="${subfile_dir}/${i}"
    if [ -f "${subfile}" ]; then
        subfiles_loc+=("$subfile")
        subfiles_size+=($(wc -c "${subfile}" | awk '{print $1}'))
    else
        subfiles_size+=(0)
    fi
done

START="$(date +%s%N)"

mpi_rank=0
mpi_size=1
nstart=1
nend=$nsubfiles

if [ "$parallel" == "true" ]; then

    hex=$(hexdump -n 16 -v -e '/1 "%02X"' /dev/urandom)
    c_exec="h5fuse_"${hex}
    c_src=${c_exec}.c

    # Generate and compile an MPI program to get MPI rank and size
    if [ ! -f "${c_src}" ]; then
        gen_mpi
        CC=@CC@
        ${CC} "${c_src}" -o "${c_exec}"
    fi
    wait
    rank_size=$(./"${c_exec}")
    read -r mpi_rank mpi_size <<<"$rank_size"

    rm -f "${c_src}" "${c_exec}"

    # Divide the subfiles among the ranks
    iwork1=$(( nsubfiles / mpi_size ))
    iwork2=$(( nsubfiles % mpi_size ))
    min=$(( mpi_rank < iwork2 ? mpi_rank : iwork2 ))
    nstart=$(( mpi_rank * iwork1 + 1 + min ))
    nend=$(( nstart + iwork1 - 1 ))
    if [ $iwork2 -gt "$mpi_rank" ]; then
        nend=$(( nend + 1 ))
    fi
fi

############################################################
# COMBINE SUBFILES INTO AN HDF5 FILE                       #
############################################################
icnt=1
skip=0
seek=0
seek_cnt=0
for i in "${subfiles[@]}"; do

    subfile="${subfile_dir}/${i}"

    # bs=BYTES read and write up to BYTES bytes at a time; overrides ibs and obs
    # ibs=BYTES read up to BYTES bytes at a time
    # obs=BYTES write BYTES bytes at a time
    # seek=N skip N obs-sized blocks at start of output
    # skip=N skip N ibs-sized blocks at start of input

    status=1
    fsize=${subfiles_size[icnt-1]}
    if [ "$fsize" -eq "0" ]; then
       seek_cnt=$((seek_cnt+1))
       seek=$seek_cnt
       if [ "$rm_subf" == "true" ]; then
           if [ -f "${subfile}" ]; then
               \rm -f "$subfile"
           fi
       fi
    else
       if [ $icnt -ge "$nstart" ] && [ $icnt -le "$nend" ]; then
          records_left=$fsize
          while [ "$status" -gt 0 ]; do
              if [ $((skip*stripe_size)) -le "$fsize"  ] && [ "$records_left" -gt 0 ]; then
                  EXEC="dd count=1 bs=$stripe_size if=$subfile of=$hdf5_file skip=$skip seek=$seek conv=notrunc"
                  if [ "$verbose" == "true" ]; then
                      echo -e "$GRN $EXEC $NC"
                  fi
                  err=$( $EXEC 2>&1 1>/dev/null )
                  if [ $? -ne 0 ]; then
                     echo -e "$CYN ERR: dd Utility Failed $NC"
                     echo -e "$CYN MSG: $err $NC"
                     exit $FAILED
                  fi
                  records_left=$((records_left-stripe_size))
                  skip=$((skip+1))
                  seek=$((seek_cnt+skip*nsubfiles))
              else
                  status=0
                  skip=0
              fi
          done; wait
          if [ "$rm_subf" == "true" ]; then
              \rm -f "$subfile"
          fi
       fi
       seek_cnt=$((seek_cnt+1))
       seek=$seek_cnt
    fi
    icnt=$(( icnt +1 ))
done; wait

END=$(( $(date +%s%N) - START ))
DURATION_SEC=$(awk -vp="$END" -vq=0.000000001 'BEGIN{printf "%.4f" ,p * q}')
if [ "$quiet" == "false" ]; then
    echo -e "$PUR COMPLETION TIME = $DURATION_SEC s $NC"
fi